{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "<a target=\"_blank\" href=\"https://colab.research.google.com/github/cohere-ai/notebooks/blob/main/notebooks/What_is_Semantic_Search.ipynb\">\n",
        "  <img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/>\n",
        "</a>"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "QrZoppfP32yt"
      },
      "source": [
        "# What is Semantic Search?\n",
        "In this codelab you'll build a very basic semantic search model on a small dataset using the _embed_ function.\n",
        "\n",
        "Read the accompanying blog post [here](https://txt.cohere.ai/what-is-semantic-search/)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "KfHExgpe3-WU",
        "outputId": "793e8110-91c5-4d9b-9fc5-9a04f889df91"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
            "Collecting cohere\n",
            "  Downloading cohere-3.5.0.tar.gz (13 kB)\n",
            "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "Collecting umap-learn\n",
            "  Downloading umap-learn-0.5.3.tar.gz (88 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m88.2/88.2 KB\u001b[0m \u001b[31m4.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "Requirement already satisfied: altair in /usr/local/lib/python3.8/dist-packages (4.2.2)\n",
            "Collecting annoy\n",
            "  Downloading annoy-1.17.1.tar.gz (647 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m648.0/648.0 KB\u001b[0m \u001b[31m13.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "Collecting datasets\n",
            "  Downloading datasets-2.9.0-py3-none-any.whl (462 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m462.8/462.8 KB\u001b[0m \u001b[31m6.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: tqdm in /usr/local/lib/python3.8/dist-packages (4.64.1)\n",
            "Requirement already satisfied: requests in /usr/local/lib/python3.8/dist-packages (from cohere) (2.25.1)\n",
            "Collecting urllib3~=1.26\n",
            "  Downloading urllib3-1.26.14-py2.py3-none-any.whl (140 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m140.6/140.6 KB\u001b[0m \u001b[31m8.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.8/dist-packages (from umap-learn) (1.21.6)\n",
            "Requirement already satisfied: scikit-learn>=0.22 in /usr/local/lib/python3.8/dist-packages (from umap-learn) (1.0.2)\n",
            "Requirement already satisfied: scipy>=1.0 in /usr/local/lib/python3.8/dist-packages (from umap-learn) (1.7.3)\n",
            "Requirement already satisfied: numba>=0.49 in /usr/local/lib/python3.8/dist-packages (from umap-learn) (0.56.4)\n",
            "Collecting pynndescent>=0.5\n",
            "  Downloading pynndescent-0.5.8.tar.gz (1.1 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m12.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "Requirement already satisfied: toolz in /usr/local/lib/python3.8/dist-packages (from altair) (0.12.0)\n",
            "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.8/dist-packages (from altair) (4.3.3)\n",
            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.8/dist-packages (from altair) (2.11.3)\n",
            "Requirement already satisfied: pandas>=0.18 in /usr/local/lib/python3.8/dist-packages (from altair) (1.3.5)\n",
            "Requirement already satisfied: entrypoints in /usr/local/lib/python3.8/dist-packages (from altair) (0.4)\n",
            "Collecting huggingface-hub<1.0.0,>=0.2.0\n",
            "  Downloading huggingface_hub-0.12.0-py3-none-any.whl (190 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m190.3/190.3 KB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from datasets) (23.0)\n",
            "Collecting xxhash\n",
            "  Downloading xxhash-3.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (213 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m213.0/213.0 KB\u001b[0m \u001b[31m5.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting responses<0.19\n",
            "  Downloading responses-0.18.0-py3-none-any.whl (38 kB)\n",
            "Requirement already satisfied: dill<0.3.7 in /usr/local/lib/python3.8/dist-packages (from datasets) (0.3.6)\n",
            "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.8/dist-packages (from datasets) (6.0)\n",
            "Requirement already satisfied: fsspec[http]>=2021.11.1 in /usr/local/lib/python3.8/dist-packages (from datasets) (2023.1.0)\n",
            "Requirement already satisfied: aiohttp in /usr/local/lib/python3.8/dist-packages (from datasets) (3.8.3)\n",
            "Collecting multiprocess\n",
            "  Downloading multiprocess-0.70.14-py38-none-any.whl (132 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m132.0/132.0 KB\u001b[0m \u001b[31m8.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: pyarrow>=6.0.0 in /usr/local/lib/python3.8/dist-packages (from datasets) (9.0.0)\n",
            "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (22.2.0)\n",
            "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (1.3.1)\n",
            "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (1.3.3)\n",
            "Requirement already satisfied: charset-normalizer<3.0,>=2.0 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (2.1.1)\n",
            "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (4.0.2)\n",
            "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (1.8.2)\n",
            "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (6.0.4)\n",
            "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.8/dist-packages (from huggingface-hub<1.0.0,>=0.2.0->datasets) (4.4.0)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.8/dist-packages (from huggingface-hub<1.0.0,>=0.2.0->datasets) (3.9.0)\n",
            "Requirement already satisfied: importlib-resources>=1.4.0 in /usr/local/lib/python3.8/dist-packages (from jsonschema>=3.0->altair) (5.10.2)\n",
            "Requirement already satisfied: pyrsistent!=0.17.0,!=0.17.1,!=0.17.2,>=0.14.0 in /usr/local/lib/python3.8/dist-packages (from jsonschema>=3.0->altair) (0.19.3)\n",
            "Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.8/dist-packages (from numba>=0.49->umap-learn) (6.0.0)\n",
            "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.49->umap-learn) (0.39.1)\n",
            "Requirement already satisfied: setuptools in /usr/local/lib/python3.8/dist-packages (from numba>=0.49->umap-learn) (57.4.0)\n",
            "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas>=0.18->altair) (2.8.2)\n",
            "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas>=0.18->altair) (2022.7.1)\n",
            "Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.8/dist-packages (from pynndescent>=0.5->umap-learn) (1.2.0)\n",
            "Requirement already satisfied: chardet<5,>=3.0.2 in /usr/local/lib/python3.8/dist-packages (from requests->cohere) (4.0.0)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.8/dist-packages (from requests->cohere) (2022.12.7)\n",
            "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.8/dist-packages (from requests->cohere) (2.10)\n",
            "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.8/dist-packages (from scikit-learn>=0.22->umap-learn) (3.1.0)\n",
            "Requirement already satisfied: MarkupSafe>=0.23 in /usr/local/lib/python3.8/dist-packages (from jinja2->altair) (2.0.1)\n",
            "Requirement already satisfied: zipp>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from importlib-resources>=1.4.0->jsonschema>=3.0->altair) (3.12.1)\n",
            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.8/dist-packages (from python-dateutil>=2.7.3->pandas>=0.18->altair) (1.15.0)\n",
            "Building wheels for collected packages: cohere, umap-learn, annoy, pynndescent\n",
            "  Building wheel for cohere (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for cohere: filename=cohere-3.5.0-cp38-cp38-linux_x86_64.whl size=16265 sha256=589f6e6e07d0a40354b5776a95938b35557a4948817a583cd74e134021b9ba5a\n",
            "  Stored in directory: /root/.cache/pip/wheels/c3/2c/25/0696f1aa599c730e68d48caafb6fc8ff2b1870ea451336e7ff\n",
            "  Building wheel for umap-learn (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for umap-learn: filename=umap_learn-0.5.3-py3-none-any.whl size=82829 sha256=39db2f88e8b8c5b364fe5e23027929041b07afd0f7add3c58d4bc61d48c03efe\n",
            "  Stored in directory: /root/.cache/pip/wheels/a9/3a/67/06a8950e053725912e6a8c42c4a3a241410f6487b8402542ea\n",
            "  Building wheel for annoy (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for annoy: filename=annoy-1.17.1-cp38-cp38-linux_x86_64.whl size=582783 sha256=a836798518df85e625578eaa22ecda579c681fa192d66e7c48f229b2edc962aa\n",
            "  Stored in directory: /root/.cache/pip/wheels/f9/93/19/30511c4a9ae6b4937455a134c34a39e13943e2c6f46fcd2ed2\n",
            "  Building wheel for pynndescent (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for pynndescent: filename=pynndescent-0.5.8-py3-none-any.whl size=55513 sha256=27edfd29a59bd8d247ba23325f416c17e088409d9fffec48125ca17033db3d77\n",
            "  Stored in directory: /root/.cache/pip/wheels/1c/63/3a/29954bca1a27ba100ed8c27973a78cb71b43dc67aed62e80c3\n",
            "Successfully built cohere umap-learn annoy pynndescent\n",
            "Installing collected packages: annoy, xxhash, urllib3, multiprocess, responses, pynndescent, huggingface-hub, cohere, umap-learn, datasets\n",
            "  Attempting uninstall: urllib3\n",
            "    Found existing installation: urllib3 1.24.3\n",
            "    Uninstalling urllib3-1.24.3:\n",
            "      Successfully uninstalled urllib3-1.24.3\n",
            "Successfully installed annoy-1.17.1 cohere-3.5.0 datasets-2.9.0 huggingface-hub-0.12.0 multiprocess-0.70.14 pynndescent-0.5.8 responses-0.18.0 umap-learn-0.5.3 urllib3-1.26.14 xxhash-3.2.0\n"
          ]
        }
      ],
      "source": [
        "# Install Cohere for embeddings, Umap to reduce embeddings to 2 dimensions, \n",
        "# Altair for visualization, Annoy for approximate nearest neighbor search\n",
        "# TODO: upgrade to \"cohere>5\"\n",
        "!pip install \"cohere<5\" umap-learn altair annoy datasets tqdm"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "jrtPurK92qD6"
      },
      "outputs": [],
      "source": [
        "#@title Import libraries (Run this cell to execute required code) {display-mode: \"form\"}\n",
        "\n",
        "import cohere\n",
        "import numpy as np\n",
        "import re\n",
        "import pandas as pd\n",
        "from tqdm import tqdm\n",
        "from datasets import load_dataset\n",
        "import umap\n",
        "import altair as alt\n",
        "from sklearn.metrics.pairwise import cosine_similarity\n",
        "from annoy import AnnoyIndex\n",
        "import warnings\n",
        "warnings.filterwarnings('ignore')\n",
        "pd.set_option('display.max_colwidth', None)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "agXt3qRK5N8o"
      },
      "outputs": [],
      "source": [
        "# Paste your API key here. Remember to not share publicly\n",
        "api_key = ''\n",
        "\n",
        "# Create and retrieve a Cohere API key from os.cohere.ai\n",
        "co = cohere.Client(api_key)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "xfwpfElZ0rjJ"
      },
      "source": [
        "# The dataset"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 300
        },
        "id": "P6uurRiy3-Gu",
        "outputId": "3efeb1b2-c663-4fd3-edb8-666fa7597465"
      },
      "outputs": [
        {
          "data": {
            "text/html": [
              "\n",
              "  <div id=\"df-b52c681a-6db6-41c8-adac-f9654f9d5afc\">\n",
              "    <div class=\"colab-df-container\">\n",
              "      <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>text</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>Where is the world cup?</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>The world cup is in Qatar</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>What color is the sky?</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>The sky is blue</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>Where does the bear live?</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>5</th>\n",
              "      <td>The bear lives in the the woods</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>6</th>\n",
              "      <td>What is an apple?</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>7</th>\n",
              "      <td>An apple is a fruit</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-b52c681a-6db6-41c8-adac-f9654f9d5afc')\"\n",
              "              title=\"Convert this dataframe to an interactive table.\"\n",
              "              style=\"display:none;\">\n",
              "        \n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "       width=\"24px\">\n",
              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
              "  </svg>\n",
              "      </button>\n",
              "      \n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      flex-wrap:wrap;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "      <script>\n",
              "        const buttonEl =\n",
              "          document.querySelector('#df-b52c681a-6db6-41c8-adac-f9654f9d5afc button.colab-df-convert');\n",
              "        buttonEl.style.display =\n",
              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "        async function convertToInteractive(key) {\n",
              "          const element = document.querySelector('#df-b52c681a-6db6-41c8-adac-f9654f9d5afc');\n",
              "          const dataTable =\n",
              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                     [key], {});\n",
              "          if (!dataTable) return;\n",
              "\n",
              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "            + ' to learn more about interactive tables.';\n",
              "          element.innerHTML = '';\n",
              "          dataTable['output_type'] = 'display_data';\n",
              "          await google.colab.output.renderOutput(dataTable, element);\n",
              "          const docLink = document.createElement('div');\n",
              "          docLink.innerHTML = docLinkHtml;\n",
              "          element.appendChild(docLink);\n",
              "        }\n",
              "      </script>\n",
              "    </div>\n",
              "  </div>\n",
              "  "
            ],
            "text/plain": [
              "                              text\n",
              "0          Where is the world cup?\n",
              "1        The world cup is in Qatar\n",
              "2           What color is the sky?\n",
              "3                  The sky is blue\n",
              "4        Where does the bear live?\n",
              "5  The bear lives in the the woods\n",
              "6                What is an apple?\n",
              "7              An apple is a fruit"
            ]
          },
          "execution_count": 13,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "qa_df = pd.DataFrame({'text':\n",
        "  [\n",
        "   'Where is the world cup?',\n",
        "   'The world cup is in Qatar',\n",
        "   'What color is the sky?',\n",
        "   'The sky is blue',\n",
        "   'Where does the bear live?',\n",
        "   'The bear lives in the the woods',\n",
        "   'What is an apple?',\n",
        "   'An apple is a fruit',\n",
        "  ]})\n",
        "\n",
        "qa_df"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "VWUOlOD80tUN"
      },
      "source": [
        "# Creating the embedding"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "gKcbOZ7y4Wc9"
      },
      "outputs": [],
      "source": [
        "from cohere import embeddings\n",
        "qa = co.embed(texts=list(qa_df['text']), model='large', truncate='LEFT').embeddings"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "YAPQbyo_0vEu"
      },
      "source": [
        "# Plotting the embedding in 2D"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 468
        },
        "id": "U9eRFb5X50wA",
        "outputId": "32358ca2-e1fd-4c04-ef5d-91cecc43e195"
      },
      "outputs": [
        {
          "data": {
            "text/html": [
              "\n",
              "<div id=\"altair-viz-8c4f0ad4f1314f92b275bfae17336348\"></div>\n",
              "<script type=\"text/javascript\">\n",
              "  var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n",
              "  (function(spec, embedOpt){\n",
              "    let outputDiv = document.currentScript.previousElementSibling;\n",
              "    if (outputDiv.id !== \"altair-viz-8c4f0ad4f1314f92b275bfae17336348\") {\n",
              "      outputDiv = document.getElementById(\"altair-viz-8c4f0ad4f1314f92b275bfae17336348\");\n",
              "    }\n",
              "    const paths = {\n",
              "      \"vega\": \"https://cdn.jsdelivr.net/npm//vega@5?noext\",\n",
              "      \"vega-lib\": \"https://cdn.jsdelivr.net/npm//vega-lib?noext\",\n",
              "      \"vega-lite\": \"https://cdn.jsdelivr.net/npm//vega-lite@4.17.0?noext\",\n",
              "      \"vega-embed\": \"https://cdn.jsdelivr.net/npm//vega-embed@6?noext\",\n",
              "    };\n",
              "\n",
              "    function maybeLoadScript(lib, version) {\n",
              "      var key = `${lib.replace(\"-\", \"\")}_version`;\n",
              "      return (VEGA_DEBUG[key] == version) ?\n",
              "        Promise.resolve(paths[lib]) :\n",
              "        new Promise(function(resolve, reject) {\n",
              "          var s = document.createElement('script');\n",
              "          document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
              "          s.async = true;\n",
              "          s.onload = () => {\n",
              "            VEGA_DEBUG[key] = version;\n",
              "            return resolve(paths[lib]);\n",
              "          };\n",
              "          s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n",
              "          s.src = paths[lib];\n",
              "        });\n",
              "    }\n",
              "\n",
              "    function showError(err) {\n",
              "      outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n",
              "      throw err;\n",
              "    }\n",
              "\n",
              "    function displayChart(vegaEmbed) {\n",
              "      vegaEmbed(outputDiv, spec, embedOpt)\n",
              "        .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n",
              "    }\n",
              "\n",
              "    if(typeof define === \"function\" && define.amd) {\n",
              "      requirejs.config({paths});\n",
              "      require([\"vega-embed\"], displayChart, err => showError(`Error loading script: ${err.message}`));\n",
              "    } else {\n",
              "      maybeLoadScript(\"vega\", \"5\")\n",
              "        .then(() => maybeLoadScript(\"vega-lite\", \"4.17.0\"))\n",
              "        .then(() => maybeLoadScript(\"vega-embed\", \"6\"))\n",
              "        .catch(showError)\n",
              "        .then(() => displayChart(vegaEmbed));\n",
              "    }\n",
              "  })({\"config\": {\"view\": {\"continuousWidth\": 400, \"continuousHeight\": 300}}, \"data\": {\"name\": \"data-4dbce3df3abc345d2b553d71739fbaf6\"}, \"mark\": {\"type\": \"circle\", \"size\": 60}, \"encoding\": {\"tooltip\": [{\"field\": \"text\", \"type\": \"nominal\"}], \"x\": {\"field\": \"x\", \"scale\": {\"zero\": false}, \"type\": \"quantitative\"}, \"y\": {\"field\": \"y\", \"scale\": {\"zero\": false}, \"type\": \"quantitative\"}}, \"height\": 400, \"selection\": {\"selector001\": {\"type\": \"interval\", \"bind\": \"scales\", \"encodings\": [\"x\", \"y\"]}}, \"width\": 700, \"$schema\": \"https://vega.github.io/schema/vega-lite/v4.17.0.json\", \"datasets\": {\"data-4dbce3df3abc345d2b553d71739fbaf6\": [{\"text\": \"Where is the world cup?\", \"x\": -3.640042304992676, \"y\": 20.470325469970703}, {\"text\": \"The world cup is in Qatar\", \"x\": -3.995410203933716, \"y\": 20.824081420898438}, {\"text\": \"What color is the sky?\", \"x\": -4.402554035186768, \"y\": 14.899083137512207}, {\"text\": \"The sky is blue\", \"x\": -4.666635513305664, \"y\": 15.162392616271973}, {\"text\": \"Where does the bear live?\", \"x\": 4.009443759918213, \"y\": 9.762710571289062}, {\"text\": \"The bear lives in the the woods\", \"x\": 3.6688802242279053, \"y\": 9.4224214553833}, {\"text\": \"What is an apple?\", \"x\": 1.89143705368042, \"y\": -3.7451298236846924}, {\"text\": \"An apple is a fruit\", \"x\": 1.6007134914398193, \"y\": -4.035754203796387}]}}, {\"mode\": \"vega-lite\"});\n",
              "</script>"
            ],
            "text/plain": [
              "alt.Chart(...)"
            ]
          },
          "execution_count": 17,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "# UMAP reduces the dimensions from 1024 to 2 dimensions that we can plot\n",
        "reducer = umap.UMAP(n_neighbors=2) \n",
        "umap_embeds = reducer.fit_transform(qa)\n",
        "# Prepare the data to plot and interactive visualization\n",
        "# using Altair\n",
        "#df_explore = pd.DataFrame(data={'text': qa['text']})\n",
        "#print(df_explore)\n",
        "\n",
        "#df_explore = pd.DataFrame(data={'text': qa_df[0]})\n",
        "df_explore = qa_df\n",
        "df_explore['x'] = umap_embeds[:,0]\n",
        "df_explore['y'] = umap_embeds[:,1]\n",
        "\n",
        "# Plot\n",
        "chart = alt.Chart(df_explore).mark_circle(size=60).encode(\n",
        "    x=#'x',\n",
        "    alt.X('x',\n",
        "        scale=alt.Scale(zero=False)\n",
        "    ),\n",
        "    y=\n",
        "    alt.Y('y',\n",
        "        scale=alt.Scale(zero=False)\n",
        "    ),\n",
        "    tooltip=['text']\n",
        ").properties(\n",
        "    width=700,\n",
        "    height=400\n",
        ")\n",
        "chart.interactive()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "6Sr9U66jzwf_"
      },
      "source": [
        "# Plotting the cosine similarities"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 269
        },
        "id": "a6ScQzHA49Ds",
        "outputId": "c0310077-0b62-4042-d8f8-fb80a9d21d99"
      },
      "outputs": [
        {
          "data": {
            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAV0AAAD8CAYAAADUv3dIAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAX4ElEQVR4nO3df7RdZZ3f8fcnFyIDQdDBCs3NQEaDThynglmxLKeWDsIEZRGp7TSZThWWw7WrxJ+ddrC6EHGmjjNWq4ssOxmMSh1JHabaO5oRXArjMBVMUEASDF6j09z4I1IQRH7l3vvpH2cHDre5Z5+bnLP3vjufV9ZenP3jPPubLNb3fu+zn/08sk1ERFRjUd0BREQcSZJ0IyIqlKQbEVGhJN2IiAol6UZEVChJNyKiQkm6ERFzkLRZ0j5Jd89xXpI+ImlC0l2SzixrM0k3ImJunwDW9Dh/PrCi2MaAj5Y1mKQbETEH218F7u9xyVrgWnfcCpwo6ZRebR41yAAPZv99u2t/5e2Rt11adwgAXPCVuiOAYxctrjsEAJ696Bl1h8D9M4/XHQIAux/bV3cI3Pfog3WHAMADD0/ocNuYT85Z/JznvZFOhXrAJtub5nG7pcCerv3J4tgP5/rC0JNuRERTFQl2Pkn2sCXpRkS7zExXebe9wLKu/dHi2JzSpxsR7TI91f92+MaB1xWjGP4x8KDtObsWIJVuRLSMPTOwtiRdB5wNnCRpEng3cHTnPv5vwFbgVcAE8AhwSVmbSboR0S4zg0u6tteXnDdw2XzaTNKNiHYZYKU7DEm6EdEu1T5Im7ck3Yhol1S6ERHV8WBGJQxNkm5EtMsAH6QNQ5JuRLRLuhciIiq00B+kSXohnZl0lhaH9gLjtu8ZZmAREYek4ZVuz9eAJf0+sAUQ8PViE3CdpMt7fG9M0nZJ26+59rpBxhsR0Vu1rwHPW1ml+wbgRbb3dx+U9EFgB/BHB/tS98w9TZjaMSKOIAv8QdoM8A+Bv591/JTiXEREo9gLu0/3rcCXJX2Hpybq/SXg+cCGYQYWEXFIGt6n2zPp2v6ipNOB1Tz9Qdo2N/3HSUQcmRZ49wLuzJN2awWxREQcvoVc6UZELDjT+8uvqVGSbkS0S8O7F7JcT0S0i2f630pIWiNpl6SJg72bIOlUSV+WdJekmyWNlrWZpBsR7TIz0//Wg6QRYCNwPrASWC9p5azLPgBca/vXgKuA95WFl6QbEe0yoKRLZ9TWhO3dtp+g83bu2lnXrAS+Uny+6SDn/z9JuhHRKp7e3/fWPWVBsY11NbWUp95PAJjkqaGzB9wJ/PPi80XA8ZJ+sVd8eZAWEe0yjyFj3VMWHKLfA66WdDHwVTrvMfR8h2HoSfeRt1067FuUOvZDf1Z3CABMvuCiukPgrCW/XHcIANzys4m6Q2iMHzx8f90hcPKSZ9UdwuAMbvTCXmBZ1/5ocexJtn9AUelKWgK81vZPezWa7oWIaJfBjV7YBqyQtFzSYmAdMN59gaSTJB3Io+8ANpc1mqQbEe0yoAdptqfozDFzA3AP8BnbOyRdJenC4rKzgV2S7gWeC/xhWXjp042Idhnga8C2twJbZx27ouvz9cD182kzSTci2mUqqwFHRFQnE95ERFSo4XMvJOlGRLuk0o2IqFAq3YiICqXSjYioUEYvRERUyK47gp6SdCOiXdKnGxFRoYYn3UOee0HSJT3OPTlH5Se+s3euyyIiBm+Ay/UMw+FMePOeuU7Y3mR7le1VF6+YPedvRMQQTU/3v9WgZ/eCpLvmOkVnRp2IiGZpePdCWZ/uc4HfBB6YdVzA/x5KRBERh2OBJ93PA0ts3zH7hKSbhxJRRMThWMgvR9h+Q49zvz34cCIiDo9nmj1ONytHRES7DG4JdiStkbRL0oSkyw9y/pck3STpm5LukvSqsjYzTjci2mVAoxIkjQAbgXPpLL++TdK47Z1dl72LzjI+H5W0ks4qE6f1ajdJNyLaZXAP0lYDE7Z3A0jaAqwFupOugWcWn08AflDWaJJuRLTL4JLuUmBP1/4k8LJZ11wJ3CjpTcBxwCvLGk2fbkS0i9331v32bLGNzfNu64FP2B4FXgX8964l2Q8qlW5EtMs8Kl3bm4BNc5zeCyzr2h8tjnV7A7CmaOtrko4BTgL2zXXPVLoR0S4z7n/rbRuwQtJySYuBdcD4rGv+D3AOgKRfAY4BftKr0aFXuhd8Zdh3KDf5govqDgGAe3d9tu4QeOzKDXWHAMDpn6rnvfduzz/ulLpDAOCs459Xdwh8b//sl04XsAGNXrA9JWkDcAMwAmy2vUPSVcB22+PAvwf+TNLb6DxUu9juPaFvuhciolU8wNeAbW+lMwys+9gVXZ93Ai+fT5tJuhHRLg1/Iy1JNyLaZSHPvRARseCk0o2IqNBU/Q9pe0nSjYh2SfdCRESF0r0QEVGdQQ4ZG4Yk3Yhol1S6EREVStKNiKhQTUur9ytJNyJaZcGvkSbphZLOkbRk1vE1wwsrIuIQDW6WsaHomXQlvRn4X8CbgLslre06/Z97fO/JiYF/9PPZ009GRAzRABemHIay7oVLgZfafljSacD1kk6z/WFAc32pe2Lgf7L0nGbX+hHRLg3vXihLuotsPwxg+/uSzqaTeE+lR9KNiKhNw5NuWZ/ujyW95MBOkYAvoLMcxYuHGVhExKHw9EzfWx3KKt3XAVPdB2xPAa+T9KdDiyoi4lAt5ErX9qTtH81x7u+GE1JExKHzjPveykhaI2mXpAlJlx/k/Ick3VFs90r6aVmbGacbEe0yoEpX0giwETgXmAS2SRovlugBwPbbuq5/E3BGWbtZDTgi2mVmHltvq4EJ27ttPwFsAdb2uH49cF1Zo6l0I6JVPNX/AzJJY8BY16FNxZBXgKXAnq5zk8DL5mjnVGA5ULr+eZJuRLTLPAYldL9TcJjWAdfbLp34IUk3IlplgHMv7AWWde2PFscOZh1wWT+Npk83ItplcH2624AVkpZLWkwnsY7PvkjSC4FnAV/rJ7xUuhHRKoOqdG1PSdoA3ACMAJtt75B0FbDd9oEEvA7YYruvGw896R67aPGwb1HqrCW/XHcIADx25Ya6Q+CYK6+uOwQAjrnuorpD4LyjTq47BAB28WjdIXDR0cvKL1ooBviime2twNZZx66YtX/lfNpMpRsRreKp8mvqlKQbEa3S8BXYk3QjomWSdCMiqpNKNyKiQkm6EREV8nSz11dI0o2IVkmlGxFRIc+k0o2IqEwq3YiICtmpdCMiKpNKNyKiQjMLffSCpNWAbW+TtBJYA3y7mAgiIqJRFvSDNEnvBs4HjpL0JTpLVdwEXC7pDNt/OMf3nlwCY+WJL2J0SYtmMIqIRlvQSRf4F8BLgGcAPwJGbT8k6QPAbcBBk273Ehi/uez8Zi9CHxGt0t+stvUpS7pTxZo/j0j6ru2HAGw/Kqnh3dURcSRqeqVbtlzPE5KOLT6/9MBBSSfQ+Ll8IuJIZKvvrYykNZJ2SZqQdPkc1/yWpJ2Sdkj6dFmbZZXuK2w/3vmLPG0gxtHA60sjjoio2PSARi9IGgE2AufSWX59m6Rx2zu7rlkBvAN4ue0HJP2DsnZ7Jt0DCfcgx+8D7ptH/BERlRjgyxGrgQnbuwEkbQHWAju7rrkU2Gj7gc69va+s0awGHBGt4hn1vUkak7S9axvramopsKdrf7I41u104HRJfyfpVklryuLLyxER0SrzGb3QPdLqEB0FrADOBkaBr0p6se2f9vpCRERrDHD0wl6g+yWD0eJYt0ngNtv7ge9JupdOEt42V6PpXoiIVpmeWdT3VmIbsELSckmLgXXA+KxrPkenykXSSXS6G3b3ajSVbkS0yqBejrA9JWkDcAMwAmy2vUPSVcB22+PFufMk7QSmgf9g+//2ajdJNyJaZWaAUzsWc8xsnXXsiq7PBt5ebH1J0o2IVsl8uhERFVrocy8ctmcvesawb1Hqlp9N1B0CAKd/arruEDjmuovqDgGAe3d9tu4Q2P/xP6g7BABOf9+ddYfAl6an6g4B6LzadbgG2b0wDKl0I6JV+hiVUKsk3YholYb3LiTpRkS7pHshIqJCGb0QEVGhpk/0naQbEa1iUulGRFRmKt0LERHVSaUbEVGh9OlGRFQolW5ERIVS6UZEVGi64ZXuvF9SlnTtMAKJiBiEGfW/1aFnpStp9tIUAv6ZpBMBbF84x/fGgDGAVc/+Rzx/yWmHH2lERB9mBljpFqv7fpjOyhHX2P6jWecvBv6Ep9ZOu9r2Nb3aLOteGKWzxvs1dOaRELAK+C+9vtS9wub6U1/T9PknIqJFBpVwJI0AG4Fz6SxAuU3SuO2dsy79H7Y39NtuWffCKuB24J3Ag7ZvBh61/Te2/6bv6CMiKjIzj63EamDC9m7bTwBbgLWHG1/PpGt7xvaHgEuAd0q6mjx8i4gGm5H63iSNSdretY11NbUU2NO1P1kcm+21ku6SdL2kZQc5/zR9JVDbk8C/lPRq4KF+vhMRUYf5rM/S3RV6iP4KuM7245LeCHwS+I1eX5hX1Wr7C8AXDj2+iIjhGuCohL1Ad+U6ylMPzACYtdz6NcAflzXa7HUtIiLmaQb1vZXYBqyQtFzSYmAd8LQRXZJO6dq9ELinrNH0z0ZEqwxq9ILtKUkbgBvoDBnbbHuHpKuA7bbHgTdLuhCYAu4HLi5rN0k3IlplkC892N4KbJ117Iquz+9gnosYJ+lGRKtk7oWIiApNN3vqhSTdiGiXVLoRERU64pPu/TOPD/sWC8bzjzul/KIhO++ok+sOAYD9H/+DukPg6EveVXcIALzoI5fVHQJLR46rO4SBafgSaal0I6JdjvhKNyKiSvN5DbgOSboR0Sp1TU7eryTdiGiVdC9ERFQoSTciokJNX6omSTciWiV9uhERFcrohYiICs00vIMhSTciWiUP0iIiKtTsOjfL9UREywxwCXYkrZG0S9KEpMt7XPdaSZa0qqzNeVW6kn6dzlrwd9u+cT7fjYiowpQGU+tKGgE2AufSWX59m6Rx2ztnXXc88Bbgtn7a7VnpSvp61+dLgauB44F3l2T9J9eSn3x4z1yXRUQMnOexlVgNTNjebfsJYAuw9iDXvRd4P/BYP/GVdS8c3fV5DDjX9nuA84B/PdeXbG+yvcr2qtEly+a6LCJi4ObTvdBdIBbbWFdTS4HuqnGyOPYkSWcCy2x/od/4yroXFkl6Fp3kLNs/AbD9c0lT/d4kIqIq8xkyZnsTsOlQ7iNpEfBB+lgBuFtZ0j0BuB0QYEmn2P6hpCXFsYiIRhng6IW9QPev6qPFsQOOB34VuFkSwMnAuKQLbW+fq9GeSdf2aXOcmgEuKo85IqJaAxynuw1YIWk5nWS7DvjtAydtPwicdGBf0s3A7/VKuHCI43RtPwJ871C+GxExTNMDqnVtT0naANwAjACbbe+QdBWw3fb4obSblyMiolUG+Uaa7a3A1lnHrpjj2rP7aTNJNyJaxQ1/Jy1JNyJaJXMvRERUKLOMRURUqNkpN0k3IlpmquFpN0k3IlrliH+QtvuxfcO+RakfPHx/3SEAcNbxz6s7BHbxaN0hAHD6++6sOwRe9JHL6g4BgM9/c2PdITD1uavrDmFg8iAtIqJCR3ylGxFRpVS6EREVmnYq3YiIymScbkREhdKnGxFRofTpRkRUKN0LEREVSvdCRESFmj56oWw14IiIBWUG972VkbRG0i5JE5IuP8j5fyvpW5LukHSLpJVlbSbpRkSrzGcJ9l4kjQAbgfOBlcD6gyTVT9t+se2XAH9MZ3XgnpJ0I6JVPI8/JVYDE7Z3234C2AKsfdq97Ie6do+jj5kleyZdSS+T9Mzi8y9Ieo+kv5L0fkknlDUeEVG1+XQvSBqTtL1rG+tqaimwp2t/sjj2NJIuk/RdOpXum8viK6t0NwOPFJ8/DJwAvL849vG5vtT9F3nwsfvKYoiIGBjb89k22V7VtW06hPtttP084PeBd5VdXzZ6YZHtqeLzKttnFp9vkXRHjyA2AZsAVjznpc1+lBgRrTKoJdiBvcCyrv3R4thctgAfLWu0rNK9W9Ilxec7Ja0CkHQ6sL+s8YiIqg1w9MI2YIWk5ZIWA+uA8e4LJK3o2n018J2yRssq3d8FPizpXcB9wNck7aHTz/G7ZY1HRFTNAxqna3tK0gbgBmAE2Gx7h6SrgO22x4ENkl5Jpwh9AHh9Wbs9k67tB4GLi4dpy4vrJ23/+PD+OhERwzHI14BtbwW2zjp2Rdfnt8y3zb7eSCuGRdS/vkpERIm8BhwRUaGmvwacpBsRrZJZxiIiKpSkGxFRoUGNXhiWJN2IaJVUuhERFcrohYiICk272aukDT3p3vfog8O+RamTlzyr7hAA+N7+B+oOgYuOXlZ+UQW+ND1VftGQLR05ru4QAJj63NV1h8BRr9lQdwgDkz7diIgKpU83IqJC6dONiKjQTLoXIiKqk0o3IqJCR/zohYiIKqV7ISKiQk3vXsgS7BHRKjN231sZSWsk7ZI0Ienyg5x/u6Sdku6S9GVJp5a1maQbEa3iefzpRdIIsBE4H1gJrJe0ctZl36SzaO+vAdfTWYa9pyTdiGiVaU/3vZVYDUzY3m37CTqr/a7tvsD2TbYfKXZvpbNicE9JuhHRKrb73iSNSdretY11NbWUziK8B0wWx+byBuCvy+LLg7SIaJX5vAZsexOw6XDvKel3gFXAPy27tmelK+nNkpoxQ0pERB/mU+mW2At057/R4tjTFEuwvxO40PbjZY2WdS+8F7hN0t9K+neSnlPWYBHEkyX74/sf6ucrEREDMcDRC9uAFZKWS1oMrAPGuy+QdAbwp3QS7r5+4itLurvpZPf3Ai8Fdkr6oqTXSzp+ri/Z3mR7le1Vzzj6mf3EERExEIMavWB7CtgA3ADcA3zG9g5JV0m6sLjsT4AlwF9IukPS+BzNPamsT9e2Z4AbgRslHU1n+MR64ANAX5VvRERVBvkasO2twNZZx67o+vzK+bZZlnQ162b76ZTX45KOne/NIiKGbaFPYv6v5jrRNTYtIqIxFvTcC7bvrSqQiIhBWOiVbkTEgpLleiIiKpRKNyKiQpnEPCKiQgv6QVpExEKT7oWIiAo1feWIJN2IaJVUuhERFWp6n66a/lMBOrOWFfNeHtExNCWOJsTQlDiaEENT4mhCDAvBQlk5Yqz8kqFrQgzQjDiaEAM0I44mxADNiKMJMTTeQkm6ERGtkKQbEVGhhZJ0m9BP1IQYoBlxNCEGaEYcTYgBmhFHE2JovAXxIC0ioi0WSqUbEdEKSboRERVqdNKVtEbSLkkTki6vKYbNkvZJuruO+xcxLJN0k6SdknZIektNcRwj6euS7izieE8dcRSxjEj6pqTP1xjD9yV9q1iQcHtNMZwo6XpJ35Z0j6SzaojhBcW/wYHtIUlvrTqOhaKxfbqSRoB7gXOBSTrLIa+3vbPiOF4BPAxca/tXq7x3VwynAKfY/kaxCvPtwGtq+LcQcJzth4tFSm8B3mL71irjKGJ5O7AKeKbtC6q+fxHD94FVtu+r4/5FDJ8E/tb2NcUy4cfa/mmN8YwAe4GX2f77uuJosiZXuquBCdu7bT8BbAHWVh2E7a8C91d931kx/ND2N4rPP6OzHPTSGuKw7YeL3aOLrfKf2pJGgVcD11R97yaRdALwCuBjALafqDPhFs4BvpuEO7cmJ92lwJ6u/UlqSDRNI+k04AzgtpruPyLpDmAf8CXbdcTxX4H/CNQ9W7WBGyXdLqmOt7GWAz8BPl50tVwj6bga4ui2Driu5hgarclJN2aRtAT4S+Ctth+qIwbb07ZfAowCqyVV2uUi6QJgn+3bq7zvHH7d9pnA+cBlRVdUlY4CzgQ+avsM4OdALc8+AIrujQuBv6grhoWgyUl3L7Csa3+0OHZEKvpQ/xL4c9v/s+54il9jbwLWVHzrlwMXFv2pW4DfkPSpimMAwPbe4r/7gM/S6RKr0iQw2fXbxvV0knBdzge+YfvHNcbQeE1OutuAFZKWFz9B1wHjNcdUi+IB1seAe2x/sMY4niPpxOLzL9B5yPntKmOw/Q7bo7ZPo/P/xFds/06VMQBIOq54qEnxK/15QKUjXGz/CNgj6QXFoXOASh+uzrKedC2Uaux8uranJG0AbgBGgM22d1Qdh6TrgLOBkyRNAu+2/bGKw3g58G+AbxX9qQD/yfbWiuM4Bfhk8YR6EfAZ27UN2arZc4HPdn4echTwadtfrCGONwF/XhQmu4FLaojhwA+ec4E31nH/haSxQ8YiItqoyd0LERGtk6QbEVGhJN2IiAol6UZEVChJNyKiQkm6EREVStKNiKjQ/wPGzRHLTwLFiAAAAABJRU5ErkJggg==",
            "text/plain": [
              "<Figure size 432x288 with 2 Axes>"
            ]
          },
          "metadata": {
            "needs_background": "light"
          },
          "output_type": "display_data"
        }
      ],
      "source": [
        "import seaborn as sb\n",
        "\n",
        "def plot_similarities(data, embedding):\n",
        "  similarities = []\n",
        "\n",
        "  for i in range(len(data)):\n",
        "    similarities.append([])\n",
        "    for j in range(len(data)):\n",
        "      #print(qa_df['text'][i], ',', qa_df['text'][j], '->', cosine_similarity(np.array([qa[i]]), np.array([qa[j]])))\n",
        "      similarities[-1].append(cosine_similarity(np.array([embedding[i]]), np.array([embedding[j]])))\n",
        "    #print()\n",
        "\n",
        "  similarities = np.array(similarities).squeeze()\n",
        "  #print(similarities)\n",
        "  sb.heatmap(similarities)\n",
        "\n",
        "plot_similarities(qa_df, qa)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "TI2AFuM_EFX9"
      },
      "source": [
        "# A more complicated example"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 457
        },
        "id": "rLL388ui8C5R",
        "outputId": "642374ba-2616-4cf6-d0fc-800817042a29"
      },
      "outputs": [
        {
          "data": {
            "text/html": [
              "\n",
              "  <div id=\"df-6bc0e4c0-dffa-4f99-8cc6-8dc40b294d8d\">\n",
              "    <div class=\"colab-df-container\">\n",
              "      <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>text</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>Where is the world cup?</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>What color is the sky?</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>Where does the bear live?</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>What is an apple?</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>The world cup is in Qatar</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>5</th>\n",
              "      <td>The world cup is in the moon</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>6</th>\n",
              "      <td>The previous world cup was in Russia</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>7</th>\n",
              "      <td>The sky is green</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>8</th>\n",
              "      <td>The sky is blue</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>9</th>\n",
              "      <td>The bear lives in the the woods</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>10</th>\n",
              "      <td>The bear lives in his apartment</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>11</th>\n",
              "      <td>An apple is a fruit</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>12</th>\n",
              "      <td>Apple is a company</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-6bc0e4c0-dffa-4f99-8cc6-8dc40b294d8d')\"\n",
              "              title=\"Convert this dataframe to an interactive table.\"\n",
              "              style=\"display:none;\">\n",
              "        \n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "       width=\"24px\">\n",
              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
              "  </svg>\n",
              "      </button>\n",
              "      \n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      flex-wrap:wrap;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "      <script>\n",
              "        const buttonEl =\n",
              "          document.querySelector('#df-6bc0e4c0-dffa-4f99-8cc6-8dc40b294d8d button.colab-df-convert');\n",
              "        buttonEl.style.display =\n",
              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "        async function convertToInteractive(key) {\n",
              "          const element = document.querySelector('#df-6bc0e4c0-dffa-4f99-8cc6-8dc40b294d8d');\n",
              "          const dataTable =\n",
              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                     [key], {});\n",
              "          if (!dataTable) return;\n",
              "\n",
              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "            + ' to learn more about interactive tables.';\n",
              "          element.innerHTML = '';\n",
              "          dataTable['output_type'] = 'display_data';\n",
              "          await google.colab.output.renderOutput(dataTable, element);\n",
              "          const docLink = document.createElement('div');\n",
              "          docLink.innerHTML = docLinkHtml;\n",
              "          element.appendChild(docLink);\n",
              "        }\n",
              "      </script>\n",
              "    </div>\n",
              "  </div>\n",
              "  "
            ],
            "text/plain": [
              "                                    text\n",
              "0                Where is the world cup?\n",
              "1                 What color is the sky?\n",
              "2              Where does the bear live?\n",
              "3                      What is an apple?\n",
              "4              The world cup is in Qatar\n",
              "5           The world cup is in the moon\n",
              "6   The previous world cup was in Russia\n",
              "7                       The sky is green\n",
              "8                        The sky is blue\n",
              "9        The bear lives in the the woods\n",
              "10       The bear lives in his apartment\n",
              "11                   An apple is a fruit\n",
              "12                    Apple is a company"
            ]
          },
          "execution_count": 18,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "qa_df_confusing = pd.DataFrame({'text':\n",
        "  [\n",
        "   'Where is the world cup?',\n",
        "   'What color is the sky?',\n",
        "   'Where does the bear live?',\n",
        "   'What is an apple?',\n",
        "   'The world cup is in Qatar',\n",
        "   'The world cup is in the moon',\n",
        "   'The previous world cup was in Russia',\n",
        "   'The sky is green',\n",
        "   'The sky is blue',\n",
        "   'The bear lives in the the woods',\n",
        "   'The bear lives in his apartment',\n",
        "   'An apple is a fruit',\n",
        "   'Apple is a company'\n",
        "  ]})\n",
        "\n",
        "qa_df_confusing"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "wuF-go3LEsLB"
      },
      "outputs": [],
      "source": [
        "from cohere import embeddings\n",
        "qa_confusing = co.embed(texts=list(qa_df_confusing['text']), model='large', truncate='LEFT').embeddings"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 468
        },
        "id": "RY4OnBhHELoV",
        "outputId": "be79b56f-79ea-46a6-afa1-e30016294be5"
      },
      "outputs": [
        {
          "data": {
            "text/html": [
              "\n",
              "<div id=\"altair-viz-db76d27886fe4184a3819932fcd20804\"></div>\n",
              "<script type=\"text/javascript\">\n",
              "  var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n",
              "  (function(spec, embedOpt){\n",
              "    let outputDiv = document.currentScript.previousElementSibling;\n",
              "    if (outputDiv.id !== \"altair-viz-db76d27886fe4184a3819932fcd20804\") {\n",
              "      outputDiv = document.getElementById(\"altair-viz-db76d27886fe4184a3819932fcd20804\");\n",
              "    }\n",
              "    const paths = {\n",
              "      \"vega\": \"https://cdn.jsdelivr.net/npm//vega@5?noext\",\n",
              "      \"vega-lib\": \"https://cdn.jsdelivr.net/npm//vega-lib?noext\",\n",
              "      \"vega-lite\": \"https://cdn.jsdelivr.net/npm//vega-lite@4.17.0?noext\",\n",
              "      \"vega-embed\": \"https://cdn.jsdelivr.net/npm//vega-embed@6?noext\",\n",
              "    };\n",
              "\n",
              "    function maybeLoadScript(lib, version) {\n",
              "      var key = `${lib.replace(\"-\", \"\")}_version`;\n",
              "      return (VEGA_DEBUG[key] == version) ?\n",
              "        Promise.resolve(paths[lib]) :\n",
              "        new Promise(function(resolve, reject) {\n",
              "          var s = document.createElement('script');\n",
              "          document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
              "          s.async = true;\n",
              "          s.onload = () => {\n",
              "            VEGA_DEBUG[key] = version;\n",
              "            return resolve(paths[lib]);\n",
              "          };\n",
              "          s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n",
              "          s.src = paths[lib];\n",
              "        });\n",
              "    }\n",
              "\n",
              "    function showError(err) {\n",
              "      outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n",
              "      throw err;\n",
              "    }\n",
              "\n",
              "    function displayChart(vegaEmbed) {\n",
              "      vegaEmbed(outputDiv, spec, embedOpt)\n",
              "        .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n",
              "    }\n",
              "\n",
              "    if(typeof define === \"function\" && define.amd) {\n",
              "      requirejs.config({paths});\n",
              "      require([\"vega-embed\"], displayChart, err => showError(`Error loading script: ${err.message}`));\n",
              "    } else {\n",
              "      maybeLoadScript(\"vega\", \"5\")\n",
              "        .then(() => maybeLoadScript(\"vega-lite\", \"4.17.0\"))\n",
              "        .then(() => maybeLoadScript(\"vega-embed\", \"6\"))\n",
              "        .catch(showError)\n",
              "        .then(() => displayChart(vegaEmbed));\n",
              "    }\n",
              "  })({\"config\": {\"view\": {\"continuousWidth\": 400, \"continuousHeight\": 300}}, \"data\": {\"name\": \"data-87638ccc33e29f13870384fccf72e470\"}, \"mark\": {\"type\": \"circle\", \"size\": 60}, \"encoding\": {\"tooltip\": [{\"field\": \"text\", \"type\": \"nominal\"}], \"x\": {\"field\": \"x\", \"scale\": {\"zero\": false}, \"type\": \"quantitative\"}, \"y\": {\"field\": \"y\", \"scale\": {\"zero\": false}, \"type\": \"quantitative\"}}, \"height\": 400, \"selection\": {\"selector002\": {\"type\": \"interval\", \"bind\": \"scales\", \"encodings\": [\"x\", \"y\"]}}, \"width\": 700, \"$schema\": \"https://vega.github.io/schema/vega-lite/v4.17.0.json\", \"datasets\": {\"data-87638ccc33e29f13870384fccf72e470\": [{\"text\": \"Where is the world cup?\", \"x\": 10.177486419677734, \"y\": 26.74079704284668}, {\"text\": \"What color is the sky?\", \"x\": 7.576291561126709, \"y\": -0.09173079580068588}, {\"text\": \"Where does the bear live?\", \"x\": 6.783506393432617, \"y\": -5.398342609405518}, {\"text\": \"What is an apple?\", \"x\": 2.893805503845215, \"y\": -16.37334632873535}, {\"text\": \"The world cup is in Qatar\", \"x\": 9.785728454589844, \"y\": 27.24041748046875}, {\"text\": \"The world cup is in the moon\", \"x\": 10.136798858642578, \"y\": 26.39845085144043}, {\"text\": \"The previous world cup was in Russia\", \"x\": 10.068984985351562, \"y\": 27.62099838256836}, {\"text\": \"The sky is green\", \"x\": 8.155501365661621, \"y\": -0.08989375084638596}, {\"text\": \"The sky is blue\", \"x\": 7.736617088317871, \"y\": 0.15234991908073425}, {\"text\": \"The bear lives in the the woods\", \"x\": 7.069847106933594, \"y\": -5.533956050872803}, {\"text\": \"The bear lives in his apartment\", \"x\": 7.422914505004883, \"y\": -5.302755355834961}, {\"text\": \"An apple is a fruit\", \"x\": 3.15231990814209, \"y\": -16.115108489990234}, {\"text\": \"Apple is a company\", \"x\": 3.4663479328155518, \"y\": -15.800969123840332}]}}, {\"mode\": \"vega-lite\"});\n",
              "</script>"
            ],
            "text/plain": [
              "alt.Chart(...)"
            ]
          },
          "execution_count": 21,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        " # UMAP reduces the dimensions from 1024 to 2 dimensions that we can plot\n",
        "reducer = umap.UMAP(n_neighbors=2)\n",
        "umap_embeds = reducer.fit_transform(qa_confusing)\n",
        "# Prepare the data to plot and interactive visualization\n",
        "# using Altair\n",
        "#df_explore = pd.DataFrame(data={'text': qa['text']})\n",
        "#print(df_explore)\n",
        "\n",
        "#df_explore = pd.DataFrame(data={'text': qa_df[0]})\n",
        "df_explore = qa_df_confusing\n",
        "df_explore['x'] = umap_embeds[:,0]\n",
        "df_explore['y'] = umap_embeds[:,1]\n",
        "\n",
        "# Plot\n",
        "chart = alt.Chart(df_explore).mark_circle(size=60).encode(\n",
        "    x=#'x',\n",
        "    alt.X('x',\n",
        "        scale=alt.Scale(zero=False)\n",
        "    ),\n",
        "    y=\n",
        "    alt.Y('y',\n",
        "        scale=alt.Scale(zero=False)\n",
        "    ),\n",
        "    tooltip=['text']\n",
        ").properties(\n",
        "    width=700,\n",
        "    height=400\n",
        ")\n",
        "chart.interactive()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 269
        },
        "id": "31_8qCIU-L3t",
        "outputId": "a74cf627-f975-47b8-b4b0-48aa66737882"
      },
      "outputs": [
        {
          "data": {
            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAV0AAAD8CAYAAADUv3dIAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAes0lEQVR4nO3de5hcVZnv8e+PTgLkwv2eDhA0XMUBjJEZHWAExgZ9iMo4Bo6PgJc454jX8UB44KDimVG843PwEhAcnAEG42WiZgRBLo4jmAABcyHQRCAdEgi3YAiYdPd7/tg7WGm6ald1raquqvw+PPvpqr13vbWa7ry1eu2136WIwMzMmmO70W6Amdm2xEnXzKyJnHTNzJrISdfMrImcdM3MmshJ18ysiZx0zczKkHSVpCclLSlzXJK+IalX0v2SjimK6aRrZlbe94CeCsdPAabl22zgW0UBnXTNzMqIiDuAZyqcMhO4JjJ3ArtI2rdSzDEpGziczU+tTHrLW89R/5AyHADdYyYlj3n3i6uTxvtNz4Sk8QDYTknDbX58c9J4AOcsT/+zaQfr+jckjffIxieSxgMYiMHkMdetX1H3L2UtOWfcnq/6EFkPdYu5ETG3hrebDKwqed6X71tT7gUNT7pmZq0qT7C1JNm6OemaWWcZHGjmu60GppQ87873leUxXTPrLAP91W/1mw+8N5/FcCywPiLKDi2Ae7pm1mEi4VizpOuAE4A9JPUBnwbGZu8T3wYWAKcCvcBG4JyimE66ZtZZBtMl3Yg4o+B4AB+uJaaTrpl1lgbMqkjJSdfMOktzL6TVrDDpSjqUbALw5HzXamB+RCxvZMPMzEakxXu6FWcvSDofuB4Q8Lt8E3CdpDkVXjdb0iJJi6685rqU7TUzqygG+qveRkNRT/f9wBERsdWtRpK+CiwFvjDci0onHKe+I83MrKKEF9IaoWie7iCw3zD7982PmZm1lhisfhsFRT3djwO3SHqIP99fvD/wauDcRjbMzGxE2vlCWkT8QtLBwAy2vpC2MCJa+zszs21Ti19IK5y9ENntHXc2oS1mZvUbpQtk1fI8XTPrLC1+Ic1J18w6SquPfDY86aYuOv6Lxd9OGg/g4EPekTzmmya+Kmm81y94LGk8gN3GTkwab9FTvUnjATx1+sHJY3btnfb7fvu8PyWNB/DwCxULVdXssIndSeMBLPvjquKTRkO7j+mambUVDy+YmTWRe7pmZk00kH6tvpScdM2ss7T48IKX6zGzzpLwNmBJPZJWSOodrsiXpAMk3SLpfkm3SSq8Yumka2adZXCw+q0CSV3A5cApwOHAGZIOH3Lal4FrIuK1wCXA54ua56RrZp0lUdIlK3/QGxErI2ITWZnbmUPOORz4Vf741mGOv8KIk66ksguwldbTXf1C30jfwsysZjGwueqtNFfl2+ySUJP5c6EvgD7+XINmi/uAd+aP3wFMkrR7pfbV09P9bLkDETE3IqZHxPTJE9JPyjYzK6uGMd3SXJVvc2t8t08Bx0u6FzierCBYxVviKs5ekHR/uUPA3jU2zsys8dLNXlgNTCl53p3ve1lEPE7e05U0ETg9Ip6rFLRoytjewFuAZ4fsF/DfxW02M2uydDdHLASmSZpKlmxnAWeWniBpD+CZvBrjBcBVRUGLku7PgIkRsXjoAUm3VdduM7MmStTTjYh+SecCNwJdwFURsVTSJcCiiJgPnAB8XlIAdwAfLopbVMT8/RWOnVnumJnZqEl4G3BELAAWDNl3ccnjecC8WmL6jjQz6yz9LmJuZtY823rBm+4xk5LGa0Tt2wdX/Dh5zEMOPT1pvC6lv49lw8BLSeMdtftBSeMBvP7m55PH7NKGpPHuOe8vksYDGFyXtubvoVc8mDQewIv9m5LHTKLFay+4p2tmnWVb7+mamTWVe7pmZk3knq6ZWRN59oKZWRNFjHYLKnLSNbPO4jFdM7MmavGkWzj5U9Khkk7MK+iU7u9pXLPMzEYo4XI9jVAx6Ur6KPAfwEeAJZJKq6L/c4XXvVwY+ME//iFNS83MqjEwUP02CoqGFz4IvC4iNkg6EJgn6cCIuIysvOOw8kLAcwHOOvD01h7VNrPO0uLDC0VJd7uI2AAQEY9IOoEs8R5AhaRrZjZqWjzpFo3pPiHpqC1P8gT8NmAP4MhGNszMbETaeUwXeC+wtnRHRPRHxHuB4xrWKjOzEYrBqHorIqlH0gpJvZLmDHN8f0m3SrpX0v2STi2KWVTEvOxSvhHxm8IWm5k1W6LhBUldwOXAyWQrAS+UND8ilpWcdhFwQ0R8S9LhZAXPD6wU1/N0zayzpJuVMAPojYiVAJKuB2YCpUk3gJ3yxzsDjxcFddI1s86S7kLaZGBVyfM+4A1DzvkMcJOkjwATgJOKgjY86d794urik2rwpomvShoP0hccB1jxwA+TxpvUfULSeAD7Tdw9abwVz64qPqlG79nv2OQxU3v3N9YWn1SjNZvXJ4330JfS38u09NO9yWMmUUPSlTQbmF2ya24+5bVaZwDfi4ivSPpL4PuSXpOvDjws93TNrLPUUPCm9J6CYawGppQ87873lXo/0JPH+q2kHchmdz1Z7j3TrwFjZjaaBger3ypbCEyTNFXSOGAWMH/IOY8BJwJIOgzYAVhXKah7umbWWaqYClaNiOiXdC5wI9AFXBURSyVdAiyKiPnAPwJXSPoE2UW1syMqd7WddM2ssySsqRARC8imgZXuu7jk8TLgjbXEdNI1s44SLX4bsJOumXWWRMMLjeKka2adpd0XppQ0A4iIWJjf5tYDPJCPdZiZtZZ27ulK+jRwCjBG0i/J7sa4FZgj6eiI+Kcyr3t5wvG+k6ay2457pW21mVk5/aNTnLxaRT3dvwOOArYnqzbWHRHPS/oycBcwbNItnXD8mr2Pbe2PHTPrLG0+vNAfEQPARkkPR8TzABHxoqTW/s7MbNvUzsMLwCZJ4yNiI/C6LTsl7Qw46ZpZy2n3KWPHRcSfAIYUcBgLnNWwVpmZjVQ793S3JNxh9j8FPNWQFpmZ1aOdk66ZWdsZpaXVq+Wka2YdpZq1z0ZTw5Pub3omJI33+gWPJY0H0KX0FS5TFx3/Y99tSeMBPHfGOUnjjdtnt6TxAGbfkb7XMl5dSeM98OKapPEa4Yjzb0kec+KYHZLHvD9FkG096ZqZNVWbz14wM2sv7umamTVRiyddL9djZh0lBgar3opI6pG0QlKvpDnDHP+apMX59qCk54piuqdrZp0lUU9XUhdwOXAy2fLrCyXNz1eLACAiPlFy/keAo4viuqdrZh0lBqPqrcAMoDciVkbEJuB6YGaF888ArisK6qRrZp1lMKreJM2WtKhkm10SaTKwquR5X77vFSQdAEwFflXUvJqHFyRdExHvrfV1ZmZNUcOMsdIytHWaBczLqzJWVFTEfOga7wL+RtIuABFxWpnXvVzE/OvHHsLZBw/74WBmllz0J5unuxqYUvK8O983nFnAh6sJWtTT7QaWAVeSrekuYDrwlUovKv30WH/Wia09f8PMOku6eyMWAtMkTSVLtrOAM4eeJOlQYFfgt9UELRrTnQ7cDVwIrI+I24AXI+L2iLi9+rabmTVHqgtpEdEPnAvcCCwHboiIpZIukVT6V/4s4PqIqKqDWVTacRD4mqQf5F+fKHqNmdmoSngXcL4A74Ih+y4e8vwztcSsKoFGRB/wLklvBZ6v5Q3MzJqpo6qMRcTPgZ83qC1mZvVr7Xo3Hiows84S/aPdgsoan3S3U9JwC9+2C2+5Me3/1Q0DLyWNB7DfxN2Txjvk0NO56+idksbc5bqrk8YDePS4/5k03jSNTxoPYMdI+zv56vGHsoQXksZcH5uSxgPYMDDs6lsj9nT/hqTxUmnxFdjbr6ebOuG2i9QJtxFSJ9x2kTrhNkLqhNvSnHTNzJrHPV0zsyZy0jUza6IYSDtmn5qTrpl1FPd0zcyaKAbd0zUza5qO6ulKehNZNfUlEXFTY5pkZjZykXgedmoVq4xJ+l3J4w8C/w+YBHx6uEXaSs59uRr791aUKz9pZpZeDFa/jYainu7YksezgZMjYp2kLwN3Al8Y7kVb1dM956TWrj5hZh1lsM1nL2wnaVeyHrEiYh1ARLwgadu8NczMWlq7X0jbmayIuYCQtG9ErJE0Md9nZtZSWj3pVhzTjYgDI+KgiJiaf12THxoE3tH45pmZ1Sai+q2IpB5JKyT1lruOJenvJS2TtFTStUUxRzRlLCI2An8YyWvNzBopVU9XUhdwOXAy2fLrCyXNj4hlJedMAy4A3hgRz0raqyhu0RppZmZtJUJVbwVmAL0RsTIiNgHXAzOHnPNB4PKIeDZ773iyKKiTrpl1lIEBVb2VTm/Nt9kloSYDq0qe9+X7Sh0MHCzpN5LulNRT1L6G35G2+fHNSeMteqo3aTyAo3Y/KHnMFc+uKj6pBuP22S1pPEhf//aAO76VNB7AT454xYrXdZvYtUPSeI0ogr8xcf3bHbvGJY0HMCHx/8dUark5onR66wiNAaYBJwDdwB2SjoyI5yq9wMysYyScvbAamFLyvDvfV6oPuCsiNgN/kPQgWRJeWC6ohxfMrKMknL2wEJgmaaqkccAsYP6Qc35C1stF0h5kww0rKwV1T9fMOkqqnm5E9Es6F7gR6AKuioilki4BFkXE/PzY30paBgwA/zsinq4U10nXzDrKwGC6P+AjYgGwYMi+i0seB/DJfKuKk66ZdZRqbnoYTU66ZtZRBlu8tKOTrpl1lHavp/sGSTvlj3eU9FlJP5V0qaSdm9NEM7Pqpay90AhFI85XARvzx5eRVR27NN93dbkXld7lcU3fmnKnmZklNxiqehsNhfV0I2JL3dzpEXFM/vi/JC0u96LSuzyeesvxLT6sbWadJOXshUYoat0SSefkj++TNB1A0sFA2vt7zcwSiBq20VDU0/0AcJmki4CngN9KWkVWBOIDjW6cmVmt2nr2QkSsB87OL6ZNzc/vi4gnmtE4M7NatfrshaqmjEXE88B9DW6LmVndRmmR36p5nq6ZdZRo8eUbnXTNrKP0d8LwQj3OWT4pabynTj84aTyA19/8fPKY79nv2KTxZt8xkDQewDSNTxqvEQXH711auM5fzfoXXJE03jcvSluwHuBWnk0a7/DtdkoaD+DXm9cmj5mCe7pmZk3kMV0zsyZq9Z5ua9+6YWZWo8EatiKSeiStkNQrac4wx8+WtE7S4nwrvH/BPV0z6ygDiXq6krqAy4GTydZCWyhpfkQsG3Lqv0fEudXGdU/XzDrKoKrfCswAeiNiZURsAq4HZtbbPiddM+sog6jqrcBkspIHW/Tl+4Y6XdL9kuZJmjLM8a046ZpZR6ml4E1pGdp8m13j2/0UODAiXgv8EviXohdUHNOV9FHgxxGRfiKimVkD1DJlrLQM7TBWA6U91+58X+nrS1f+vRL4YtF7FvV0PwfcJenXkv6XpD2LAsLWnx6PbnismpeYmSUxKFW9FVgITJM0VdI4YBYwv/QESfuWPD0NWF4UtCjpriTL7p8DXgcsk/QLSWdJKnurWUTMjYjpETH9gIn7F7XBzCyZgRq2SvIFHM4FbiRLpjdExFJJl0g6LT/to5KWSroP+ChwdlH7iqaMRUQMAjcBN0kaC5wCnAF8Gaiq52tm1ixVzEqoWkQsABYM2XdxyeMLgAtqiVmUdLdqfkRsJutez5cS37hvZpZAFbMSRlVR0n13uQMRsbHcMTOz0dLqizIWrRzxYLMaYmaWQsrhhUbwbcBm1lFcZczMrIkG3NNNq2vvieljakPymKmNV1fymDsmrrA/sWuHpPEgfcFxgDGnfjBpvIH/c3HxSTWawNik8fYaTP/7s09X+n+LKbina2bWRE66ZmZN1OJLpDnpmllncU/XzKyJ0i/hmpaTrpl1FM/TNTNrorYeXigpZ/Z4RNws6Uzgr8gq7szNazGYmbWMtk66wNX5OeMlnQVMBH4EnEi2ftBZjW2emVlt2rr2AnBkRLxW0hiyiun7RcSApH8F7iv3onzJi9kAR+56JK6pa2bN0upjukVFzLfLhxgmAeOBnfP920P5W2ZcxNzMRkuqIuaNUpR0vws8ACwGLgR+IOkKsmUsrm9w28zMajZIVL0VkdQjaYWkXklzKpx3uqSQNL0oZlFpx69J+vf88eOSrgFOAq6IiN8VttjMrMlSXUiT1AVcDpxMtvz6QknzI2LZkPMmAR8D7qombuES7BHxeEQ8nj9+LiLmOeGaWauqZQn2AjOA3ohYGRGbyP66nznMeZ8DLgVeqqZ9hUnXzKydDNawla5cnm+zS0JNBlaVPO/L971M0jHAlIj4ebXt880RZtZR+lX9pLGImAvMHcn7SNoO+CpVrABcqu2S7tvn/Sl5zHvO+4vkMd/9jbVJ4z3w4pqk8QBeGj8labwNA1X9dVWTb160qvikGqWuf/uJuy9JGg9g03c+kzTep65I/7Pp27Q+ecwUEs7TXQ2U/iPpzvdtMQl4DXCbJIB9yBbtPS0iFpUL2nZJ18yskoR3pC0EpkmaSpZsZwFnbjkYEeuBPbY8l3Qb8KlKCRecdM2sw1QzFawaEdEv6VzgRqALuCoilkq6BFgUEfNHEtdJ18w6SsrbgCNiAbBgyL5hx6ci4oRqYjrpmllHafeCN2ZmbWWgxUveOOmaWUdxT9fMrImi3Xu6kg4C3kk2X20AeBC4NiKeb3DbzMxq1uo93Yq3AUv6KPBtYAfg9WQlHacAd0o6ocLrXr617tENjyVsrplZZSmrjDVCUe2FDwKnRMT/JasudkREXAj0AF8r9yLX0zWz0ZKw4E1DVDOmO4ZsWGF7suV6iIjHJJUtYm5mNlr623xM90qyGpJ3AX9NVr4MSXsCzzS4bWZmNWvrC2kRcZmkm4HDgK9ExAP5/nXAcU1on5lZTVr9Qlrh8EJELAWWNqEtZmZ1a+uerplZu2n7nq6ZWTsZiG28p7uuf0PSeA+/kL6Y9+C6icljrtncmgWeS62PTUnjbRxIX2D+Vp5NHnMCaSfepC44DjDuQ2ljrv3Ox5LGA+hq0dW+Rmv+bbXc0zWzjuIxXTOzJmr1Md3W/PvAzGyEUt4GLKlH0gpJvZLmDHP8HyT9XtJiSf8l6fCimE66ZtZRoob/KpHUBVwOnAIcDpwxTFK9NiKOjIijgC+SrQ5ckYcXzKyjJJy9MAPojYiVAJKuB2YCy7acMKTa4gSqKOngpGtmHaWW2QuSZgOzS3bNjYi5+ePJwKqSY33AG4aJ8WHgk8A44M1F71lU2nFnSV+Q9ICkZyQ9LWl5vm+XouBmZs02WMNWWhEx3+aWCVtWRFweEa8CzgcuKjq/aEz3BuBZ4ISI2C0idgf+Jt93Q62NMzNrtFRjusBqsvrhW3Tn+8q5Hnh7UdCipHtgRFwaEWu37IiItRFxKXBAuReVFjF/cuPjRW0wM0sm4eyFhcA0SVMljQNmAfNLT5A0reTpW4GHioIWJd1HJZ0nae+SN9lb0vlsPdaxldIu+17j9ytqg5lZMhFR9VYQpx84F7gRWA7cEBFLJV0i6bT8tHMlLZW0mGxc96yi9hVdSHs3MAe4XdJe+b4nyLL9u4qCm5k1W8ol2CNiAbBgyL6LSx7XfH91UT3dZ8kGh88fekzSOcDVtb6hmVkjtXrthXpujvhsslaYmSWSanihUSr2dCXdX+4QsHeZY2Zmo6bVe7pFY7p7A2+BV9TXE/DfDWmRmVkd2r3K2M+AiRGxeOgBSbc1pEVmZnVo9SLmavS4xj67HJb0DQ6ekH4K2soXn0ge86Ev9SSNd8T5tySNB9C9/W5J4z2duGA9wFt3PCh5zL0Gu5LGe3i7tMXgAdYOvpg03g33XJY0HkDfiR9KHnPqfb9UvTHeOPnNVeec36z+Vd3vVyvXXjCzjtLuY7pmZm1ltGYlVMtJ18w6inu6ZmZN1O6zF8zM2spAtPYqaU66ZtZRWn1Md8S3AUv6z5QNMTNLIeXClI1QdBvwMeUOAUelb46ZWX3afUx3IXA7WZIdquxyPaXrDk3acR/Gj/PKPmbWHIMtPrxQlHSXAx+KiFdUQ5dUsYg5MBfS35FmZlZJyp6upB7gMqALuDIivjDk+CeBDwD9wDrgfRHxaKWYRWO6n6lwzkeqaLOZWVMNxGDVWyWSuoDLgVOAw4EzJB0+5LR7gekR8VpgHvDFovZVTLoRMS8iVpQ5vGtRcDOzZhuMqHorMAPojYiVEbGJbOHJmaUnRMStEbExf3on2eKVFbmIuZl1lISrAU9m67Ug+/J95bwfKJzV5SLmZtZRarmQVnrRPzc3vyZVE0nvAaYDxxed6yLmZtZRarmQVnrRfxirgSklz7vzfVuRdBJwIXB8RPyp6D1dxNzMOspADKQKtRCYJmkqWbKdBZxZeoKko4HvAD0R8WQ1QRtexHzPnQ9J+gaNaO+L/emLUN++W9p7R97Xn77Qeup71Cd07ZA0HkCX6rnsMLx9uiYmjdfXvz5pPICuui63vNJ1e45NGg+g+5bvJI85do+D6i4qvv9uR1adJB575vcV30/SqcDXyaaMXRUR/yTpEmBRRMyXdDNwJLBmS8iIOK1STNdeMLOOkvL23ohYACwYsu/ikscn1RrTSdfMOkqrF7xx0jWzjtLutwGbmbWVdi94Y2bWVlzE3MysiVp9TLfivBRJO0n6vKTvSxo6P+2bjW2amVntEtZeaIiiyYBXk9199kNglqQfSto+P3ZsuRdJmi1pkaRFL216LlFTzcyKRUTV22goSrqviog5EfGTfMLvPcCvJO1e6UURMTcipkfE9B1cwNzMmqitl+sBtpe0XUQ2Mp3fjbEauANIe1uPmVkCbT2mC/wUeHPpjoj4HvCPQPp7Z83M6pSqiHmjVOzpRsR5Zfb/QtI/N6ZJZmYj1+o3R7iIuZl1lFa/kOYi5mbWUdr9jjQXMTezttLqF9JcxNzMOkqrj+nWNP7R6A2Y3crxttU2bqvfdzu0cVv+vtt1S1+Wvz6zi08Z1XiNiNkObWxETLexdWO2QxvbVqslXTOzjuaka2bWRK2WdGteb77J8RoRsx3a2IiYbmPrxmyHNrathq8GbGZmf9ZqPV0zs47mpGtm1kQtkXQl9UhaIalX0pwE8a6S9KSkJYnaN0XSrZKWSVoq6WMJYu4g6XeS7stjJqllIalL0r2SfpYo3iOSfi9psaRFiWLuImmepAckLZf0l3XEOiRv25bteUkfT9DGT+Q/lyWSrpO0Q53xPpbHWjrS9g33ey1pN0m/lPRQ/nXXBDHflbdzUNL0BPG+lP+s75f0Y0nbdpHt0Z4oDHQBDwMHAeOA+4DD64x5HHAMsCRRG/cFjskfTwIeTNBGkd3tBzAWuAs4NkFbPwlcC/ws0ff+CLBH4p/5vwAfyB+PA3ZJ+Lu0FjigzjiTgT8AO+bPbwDOriPea4AlwHiyu0BvBl49gjiv+L0GvgjMyR/PAS5NEPMw4BDgNmB6gnh/C4zJH19aaxs7bWuFnu4MoDciVkbEJuB6YGY9ASPiDuCZFI3L462JiHvyx38ElpP9w6wnZkTEhvzp2Hyr66qmpG7grcCV9cRpJEk7k/3D/C5ARGyKiFRrOp0IPBwRjyaINQbYUdIYsmT5eB2xDgPuioiNEdEP3A68s9YgZX6vZ5J9iJF/fXu9MSNieUSsqLV9FeLdlH/fAHcC3SOJ3SlaIelOBlaVPO+jzoTWSJIOBI4m65nWG6tL0mLgSeCXEVFvzK8D5wEpqzMHcJOkuyWluKtoKrAOuDofBrlS0oQEcQFmAdfVGyQiVgNfBh4D1gDrI+KmOkIuAf5a0u6SxgOnAlPqbWdu74hYkz9eS+tX/3sf8J+j3YjR1ApJt21Imki2SOfHI+L5euNFxEBEHEX2yT9D0mvqaNvbgCcj4u562zXEmyLiGOAU4MOSjqsz3hiyPz+/FRFHAy+Q/VlcF0njgNOAHySItStZD3IqsB8wQdJ7RhovIpaT/Vl9E/ALYDEwUG87h3mfoM6/lhpJ0oVAP/Bvo92W0dQKSXc1W3/qd+f7WoqksWQJ998i4kcpY+d/Xt8K9NQR5o3AaZIeIRuiebOkf03QttX51yeBH5MNB9WjD+gr6dXPI0vC9ToFuCcinkgQ6yTgDxGxLiI2Az8C/qqegBHx3Yh4XUQcR1Yq9cEE7QR4QtK+APnXJxPFTUrS2cDbgP+Rfzhss1oh6S4EpkmamvdWZgHzR7lNW5EksjHI5RHx1UQx99xyFVfSjsDJwAMjjRcRF0REd0QcSPb/8FcRMeLeWd6uCZImbXlMdkGkrhkhEbEWWCXpkHzXicCyemLmziDB0ELuMeBYSePzn/2JZOP4IyZpr/zr/mTjudfW3crMfOCs/PFZwH8kipuMpB6yYa/TImLjaLdn1I32lbz8Q+9Usk/+h4ELE8S7jmwsbjNZz+r9dcZ7E9mfbfeT/Wm4GDi1zpivBe7NYy4BLk74//MEEsxeIJtRcl++LU3xs8njHgUsyr/3nwC71hlvAvA0sHPC/4efJfsQXAJ8H9i+zni/JvtwuQ84cYQxXvF7DewO3AI8RDYrYrcEMd+RP/4T8ARwY53xesmu22z5t/PtVD+ndtx8G7CZWRO1wvCCmdk2w0nXzKyJnHTNzJrISdfMrImcdM3MmshJ18ysiZx0zcya6P8DLNneR9QzIMEAAAAASUVORK5CYII=",
            "text/plain": [
              "<Figure size 432x288 with 2 Axes>"
            ]
          },
          "metadata": {
            "needs_background": "light"
          },
          "output_type": "display_data"
        }
      ],
      "source": [
        "plot_similarities(qa_df_confusing, qa_confusing)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "Nr-WbMapyxJb"
      },
      "source": []
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "BJqV78xRztDP",
        "outputId": "66cf8551-59a1-49fa-b7d2-630f50297e76"
      },
      "outputs": [
        {
          "data": {
            "text/plain": [
              "1"
            ]
          },
          "execution_count": 1,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "1"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "WEzyO2ClayHS"
      },
      "outputs": [],
      "source": []
    }
  ],
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}
